from django.test import TestCase

# Create your tests here.

import PyPDF2


def extract_text_between_substring(file_path, start_str, end_str):
    with open(file_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfFileReader(file)
        text = ""
        for page_num in range(pdf_reader.numPages):
            page = pdf_reader.getPage(page_num)
            pdf_text = page.extract_text()

            # 查找起始字符串和结束字符串的索引
            start_index = pdf_text.find(start_str)
            end_index = pdf_text.find(end_str)

            # 如果两个索引都存在，则提取之间的文本
            if start_index != -1 and end_index != -1 and start_index < end_index:
                between_text = pdf_text[start_index + len(start_str):end_index]
                text += between_text

    return text


# 使用示例
pdf_files = ['file1.pdf', 'file2.pdf']
start_str = "开始字符串"
end_str = "结束字符串"

for pdf_file in pdf_files:
    between_text = extract_text_between_substring(pdf_file, start_str, end_str)
    print(between_text)